set more off 
pause off
set logtype text
set mem 500M

tempfile currtemp

*************** DESCRIPTION *************************************
* Loads PDII survey output, computes occupational licensing 
* percentages and maps to BEA segments
*
*	Inputs: 	0.raw_inputs\PDII_RDD_Survey --> Raw survey data
*				from Kleiner-Krueger
*	Outputs: 	2.intermediate\license_out --> weighted avg. occ. 
*				licensing rage by BEA segment
*****************************************************************

use 0.raw_inputs\PDII_RDD_Survey.dta, clear
keep baseid q11 q11a industry

g q11ind = .
replace q11ind = 0 if q11 == 2
replace q11ind = 1 if q11 == 1

g q11aind = .
replace q11aind = 0 if q11a == 2
replace q11aind = 1 if q11a == 1

* Generate 3-digit naics
tostring industry, replace
g naics = substr(industry,1,3)
destring naics, replace
drop industry q11 q11a 
save `currtemp'

* Map to BEA codes
import excel 1.user_inputs\NAICS2BEA.xlsx, firstrow clear
merge 1:m naics using `currtemp', keep(matched using) nogen
save `currtemp', replace

* Map to BEA segments 
merge m:1 beacode using 6.Temp\levelkey, keep(matched) nogen
g indcode = ind_short  

* Aggregate
collapse (mean) q11ind q11aind, by (indcode)
rename q11ind a1m_licensed
rename q11aind a1m_licreq

compress
save 2.intermediate\license_out, replace

* End to end manual sample check
g test1 =  (a1m_licensed - 0.254545455) if indcode == "Agriculture" 
su test*
drop test*
